001    /*
002     * CrochemoreLandauZivUkelsonGlobalAlignment.java
003     *
004     * Copyright 2003 Sergio Anibal de Carvalho Junior
005     *
006     * This file is part of NeoBio.
007     *
008     * NeoBio is free software; you can redistribute it and/or modify it under the terms of
009     * the GNU General Public License as published by the Free Software Foundation; either
010     * version 2 of the License, or (at your option) any later version.
011     *
012     * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
013     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
014     * PURPOSE. See the GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License along with NeoBio;
017     * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018     * Boston, MA 02111-1307, USA.
019     *
020     * Proper attribution of the author as the source of the software would be appreciated.
021     *
022     * Sergio Anibal de Carvalho Junior             mailto:sergioanibaljr@users.sourceforge.net
023     * Department of Computer Science               http://www.dcs.kcl.ac.uk
024     * King's College London, UK                    http://www.kcl.ac.uk
025     *
026     * Please visit http://neobio.sourceforge.net
027     *
028     * This project was supervised by Professor Maxime Crochemore.
029     *
030     */
031    
032    package neobio.alignment;
033    
034    /**
035     * This class implements the <B>global</B> pairwise sequence alignment algorithm (with
036     * linear gap penalty function) due to Maxime Crochemore, Gad Landau and Michal
037     * Ziv-Ukelson (2002).
038     *
039     * <P>This implementation derives from the paper of M.Crochemore, G.Landau and
040     * M.Ziv-Ukelson, <I>A Sub-quadratic Sequence Alignment Algorithm for Unrestricted Scoring
041     * Matrices</I> (available here as
042     * <A HREF="doc-files/Crochemore_Landau_Ziv-Ukelson_algorithm.pdf">PDF</A> or
043     * <A HREF="doc-files/Crochemore_Landau_Ziv-Ukelson_algorithm.pdf">Postscript</A>).</P>
044     *
045     * <P>For a general description of the algorithm, please refer to the specification of the
046     * abstract {@linkplain CrochemoreLandauZivUkelson} superclass.</P>
047     *
048     * <P>This class consist mainly of methods that:</P>
049     *
050     * <LU>
051     * <LI>create and compute all information of a block (see {@link #createBlock createBlock}
052     * and its variants);
053     * <LI>compute the output border of a block (see {@link #computeOutputBorder
054     * computeOutputBorder};
055     * <LI>locate the score of a high scoring global alignment in the block table (see {@link
056     * #locateScore locateScore};
057     * <LI>build an optimal global alignment from the information stored in the block table
058     * (see {@link #buildOptimalAlignment buildOptimalAlignment}.
059     * </LU>
060     *
061     * @see CrochemoreLandauZivUkelson
062     * @see CrochemoreLandauZivUkelsonLocalAlignment
063     * @author Sergio A. de Carvalho Jr.
064     */
065    public class CrochemoreLandauZivUkelsonGlobalAlignment extends CrochemoreLandauZivUkelson
066    {
067            /**
068             * Creates and computes all information of an alignment block. Its main job is to
069             * compute the DIST column for the block. It then request the
070             * <CODE>computeOutputBorder</CODE> method to compute the block's output border.
071             *
072             * @param factor1 factor of the first sequence
073             * @param factor2 factor of the second sequence
074             * @param row row index of the block in the block table
075             * @param col column index of the block in the block table
076             * @return the computed block
077             * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
078             * with the sequences being aligned
079             */
080            protected AlignmentBlock createBlock (Factor factor1, Factor factor2, int row,
081                    int col) throws IncompatibleScoringSchemeException
082            {
083                    AlignmentBlock  block, left_prefix, diag_prefix, top_prefix;
084                    int                             size, lr, lc, score_ins, score_sub, score_del, ins, del, sub, max;
085    
086                    lr = factor1.length();
087                    lc = factor2.length();
088                    size = lr + lc + 1;
089    
090                    block = new AlignmentBlock (factor1, factor2, size);
091    
092                    // set up pointers to prefixes
093                    left_prefix = getLeftPrefix (block);
094                    diag_prefix = getDiagonalPrefix (block);
095                    top_prefix  = getTopPrefix (block);
096    
097                    // compute scores
098                    score_ins = scoreInsertion (factor2.getNewChar());
099                    score_sub = scoreSubstitution (factor1.getNewChar(), factor2.getNewChar());
100                    score_del = scoreDeletion (factor1.getNewChar());
101    
102                    // compute dist column and direction
103                    for (int i = 0; i < size; i++)
104                    {
105                            // compute optimal path to
106                            // input border's ith position
107    
108                            ins = sub = del = Integer.MIN_VALUE;
109    
110                            if (i < size - 1)
111                                    ins = left_prefix.dist_column[i] + score_ins;
112    
113                            if ((i > 0) && (i < size - 1))
114                                    sub = diag_prefix.dist_column[i - 1] + score_sub;
115    
116                            if (i > 0)
117                                    del = top_prefix.dist_column[i - 1] + score_del;
118    
119                            block.dist_column[i] = max = max (ins, sub, del);
120    
121                            // record the direction to of the optimal
122                            // path to input border's ith position
123                            if (max == ins)
124                                    block.direction[i] = LEFT_DIRECTION;
125                            else if (max == sub)
126                                    block.direction[i] = DIAGONAL_DIRECTION;
127                            else
128                                    block.direction[i] = TOP_DIRECTION;
129                    }
130    
131                    computeOutputBorder (block, row, col, size, lc, lr);
132    
133                    return block;
134            }
135    
136            /**
137             * Creates the root block. This is a special case of the <CODE>createBlock</CODE>
138             * method. No information is actually computed.
139             *
140             * @param factor1 factor of the first sequence
141             * @param factor2 factor of the second sequence
142             * @return the root block
143             */
144            protected AlignmentBlock createRootBlock (Factor factor1, Factor factor2)
145            {
146                    return new AlignmentBlock (factor1, factor2);
147            }
148    
149            /**
150             * Creates and computes all information of an alignment block of the first row of the
151             * block table. This is a special case of the <CODE>createBlock</CODE> method.
152             *
153             * @param factor1 factor of the first sequence
154             * @param factor2 factor of the second sequence
155             * @param col column index of the block in the block table
156             * @return the computed block
157             * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
158             * with the sequences being aligned
159             * @see #createBlock createBlock
160             */
161            protected AlignmentBlock createFirstRowBlock (Factor factor1, Factor factor2, int col)
162                    throws IncompatibleScoringSchemeException
163            {
164                    AlignmentBlock  block, left_prefix;
165                    int                             size, lr, lc, score_ins;
166    
167                    lr = 0; // factor1.length();
168                    lc = factor2.length();
169                    size = lr + lc + 1;
170    
171                    block = new AlignmentBlock (factor1, factor2, size);
172    
173                    // set up pointer to left prefix
174                    left_prefix = getLeftPrefix (block);
175    
176                    // compute insertion's score
177                    score_ins = scoreInsertion (factor2.getNewChar());
178    
179                    // compute dist column and direction
180                    for (int i = 0; i < lc; i++)
181                    {
182                            block.dist_column[i] = left_prefix.dist_column[i] + score_ins;
183                            block.direction[i] = LEFT_DIRECTION;
184                    }
185    
186                    // last position
187                    block.dist_column[lc] = 0;
188                    block.direction[lc] = STOP_DIRECTION;
189    
190                    computeOutputBorder (block, 0, col, size, lc, lr);
191    
192                    return block;
193            }
194    
195            /**
196             * Creates and computes all information of an alignment block of the first column of
197             * the block table. This is a special case of the <CODE>createBlock</CODE> method.
198             *
199             * @param factor1 factor of the first sequence
200             * @param factor2 factor of the second sequence
201             * @param row row index of the block in the block table
202             * @return the computed block
203             * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
204             * with the sequences being aligned
205             * @see #createBlock createBlock
206             */
207            protected AlignmentBlock createFirstColumnBlock (Factor factor1, Factor factor2,
208                    int row) throws IncompatibleScoringSchemeException
209            {
210                    AlignmentBlock  block, top_prefix;
211                    int                             size, lr, lc, score_del;
212    
213                    lr = factor1.length();
214                    lc = 0; // factor2.length();
215                    size = lr + lc + 1;
216    
217                    block = new AlignmentBlock (factor1, factor2, size);
218    
219                    // set up pointer to top prefix
220                    top_prefix = getTopPrefix (block);
221    
222                    // compute deletion's score
223                    score_del = scoreDeletion (factor1.getNewChar());
224    
225                    // first position
226                    block.dist_column[0] = 0;
227                    block.direction[0] = STOP_DIRECTION;
228    
229                    // compute dist column and direction
230                    for (int i = 1; i < size; i++)
231                    {
232                            block.dist_column[i] = top_prefix.dist_column[i - 1] + score_del;
233                            block.direction[i] = TOP_DIRECTION;
234                    }
235    
236                    computeOutputBorder (block, row, 0, size, lc, lr);
237    
238                    return block;
239            }
240    
241            /**
242             * Computes the output border of a block. This is performed in five steps:
243             *
244             * <LU>
245             * <LI>Retrieve the block's input border;
246             * <LI>Retrieve the block's complete DIST matrix;
247             * <LI>Create an interface to the {@linkplain OutMatrix OUT} matrix from the input
248             * border and DIST matrix;
249             * <LI>Use {@linkplain Smawk SMAWK} to compute all column maxima of the OUT matrix
250             * (SMAWK finds the index of the row that contains the maximum value of a column);
251             * <LI>Assemble the output border by extracting the maximum values of each column of
252             * the OUT matrix using the information obtained in the previous step.
253             * </LU>
254             *
255             * @param block the block for which the output border is to be computed
256             * @param row row index of the block in the block table
257             * @param col column index of the block in the block table
258             * @param dim dimension of the output border
259             * @param lc number of columns of the block
260             * @param lr number of row of the block
261             */
262            protected void computeOutputBorder (AlignmentBlock block, int row, int col, int dim,
263                    int lc, int lr)
264            {
265                    int[] input = assembleInputBorder (dim, row, col, lr);
266    
267                    int[][] dist = assembleDistMatrix (block, dim, row, col, lc);
268    
269                    // update the interface to the OUT matrix
270                    out_matrix.setData (dist, input, dim, lc);
271    
272                    // compute source_path using Smawk
273                    smawk.computeColumnMaxima (out_matrix, block.source_path);
274    
275                    // update output border
276                    for (int i = 0; i < dim; i++)
277                            block.output_border[i] = out_matrix.valueAt(block.source_path[i], i);
278            }
279    
280            /**
281             * Builds an optimal global alignment between the loaded sequences after the block
282             * table has been computed. This method traces a path back in the block table, from
283             * the last block to the first.
284             *
285             * @return an optimal global alignment
286             * @throws IncompatibleScoringSchemeException If the scoring scheme is not compatible
287             * with the loaded sequences.
288             * @see CrochemoreLandauZivUkelson#traverseBlock
289             */
290            protected PairwiseAlignment buildOptimalAlignment ()
291                    throws IncompatibleScoringSchemeException
292            {
293                    AlignmentBlock  block, ancestor;
294                    StringBuffer    gapped_seq1, tag_line, gapped_seq2;
295                    int                             source, dest, ancestor_source;
296                    int                             row, col;
297    
298                    gapped_seq1     = new StringBuffer();
299                    tag_line        = new StringBuffer();
300                    gapped_seq2     = new StringBuffer();
301    
302                    // start at the last row, last column of block table
303                    row       = num_rows - 1; col = num_cols - 1;
304                    block = block_table[row][col];
305                    dest  = block.factor2.length();
306    
307                    while (row > 0 || col > 0)
308                    {
309                            block    = block_table[row][col];
310                            source   = block.source_path[dest];
311                            ancestor = block.ancestor[dest];
312    
313                            ancestor_source = source;
314                            if (dest > block.factor2.length())
315                                    ancestor_source -= (block.factor1.length() - ancestor.factor1.length());
316    
317                            traverseBlock (ancestor, ancestor_source, gapped_seq1, tag_line, gapped_seq2);
318    
319                            if (row == 0)
320                            {
321                                    col = col - 1;
322                                    dest = block_table[row][col].factor2.length();
323                            }
324                            else if (col == 0)
325                            {
326                                    row = row - 1;
327                                    dest = 0;
328                            }
329                            else
330                            {
331                                    if (source < block.factor1.length())
332                                    {
333                                            col = col - 1;
334                                            dest = block_table[row][col].factor2.length() + source;
335                                    }
336                                    else if (source == block.factor1.length())
337                                    {
338                                            row = row - 1; col = col - 1;
339                                            dest = block_table[row][col].factor2.length();
340                                    }
341                                    else
342                                    {
343                                            row = row - 1;
344                                            dest = source - block.factor1.length();
345                                    }
346                            }
347                    }
348    
349                    return new PairwiseAlignment (gapped_seq1.toString(), tag_line.toString(),
350                            gapped_seq2.toString(), locateScore());
351            }
352    
353            /**
354             * Locate the score of the highest scoring global alignment in the block table. This
355             * value is found in the output border of the last block (last row, last column).
356             *
357             * @return the score of the highest scoring global alignment
358             */
359            protected int locateScore ()
360            {
361                    AlignmentBlock last_block = block_table[num_rows - 1][num_cols - 1];
362    
363                    return last_block.output_border[last_block.factor2.length()];
364            }
365    }